#!/usr/bin/env python3
import os
import sys
import argparse
import hashlib
import re
import yaml
import json
import filecmp
from contextlib import contextmanager

# The purpose of this code is to automatically generate various parts
# of the QPDFJob class. It is fairly complicated and extremely
# bespoke, so understanding it is important if modifications are to be
# made.

# Documentation of QPDFJob is divided among three places:
#
# * "HOW TO ADD A COMMAND-LINE ARGUMENT" in README-maintainer provides
#   a quick reminder for how to add a command-line argument
#
# * This file has a detailed explanation about how QPDFJob and
#   generate_auto_job work together
#
# * The manual ("QPDFJob Design" in qpdf-job.rst) discusses the design
#   approach, rationale, and evolution of QPDFJob.
#
# QPDFJob solved the problem of moving extensive functionality that
# lived in qpdf.cc into the library. The QPDFJob class consists of
# four major sections:
#
# * The run() method and its subsidiaries are responsible for
#   performing the actual operations on PDF files. This is implemented
#   in QPDFJob.cc
#
# * The nested Config class and the other classes it creates provide
#   an API for setting up a QPDFJob instance and correspond to the
#   command-line arguments of the qpdf executable. This is implemented
#   in QPDFJob_config.cc
#
# * The argument parsing code reads an argv array and calls
#   configuration methods. This is implemented in QPDFJob_argv.cc. The
#   argument parsing logic itself is implemented in the QPDFArgParser
#   class.
#
# * The job JSON handling code, which reads a QPDFJob JSON file and
#   calls configuration methods. This is implemented in
#   QPDFJob_json.cc. The JSON parsing code is in the JSON class. A
#   sax-like JSON handler class that calls callbacks in response to
#   items in the JSON is implemented in the JSONHandler class.
#
# This code has the job of ensuring that configuration, command-line
# arguments, and JSON are all consistent and complete so that a
# developer or user can freely move among those different ways of
# interacting with QPDFJob in a predictable fashion. In addition, help
# information for each option appears in manual/cli.rst, and that
# information is used in the creation of the job JSON schema and to supply
# help text to QPDFArgParser. This code also ensures that there is an
# exact match between options in job.yml and options in cli.rst.
#
# The job.yml file contains the data that drives this code. To
# understand job.yml, here are some important concepts.
#
# QPDFArgParser option table. There is support for positional
# arguments, options consisting of flags and optional parameters, and
# subparsers that start with a regular parameterless flag, have their
# own positional and option sections, and are terminated with -- by
# itself. Examples of this include --encrypt and --pages. An "option
# table" contains an optional positional argument handler and a list
# of valid options with specifications about their parameters. There
# are three kinds of option tables:
#
# * The built-in "help" option table contains help commands, like
#   --help and --version, that are only valid when they appear as the
#   single command-line argument.
#
# * The "main" option table contains the options that are valid
#   starting at the beginning of argument parsing.
#
# * A named option table can be started manually by the argument
#   parsing code to switch the argument parser's context. Switching
#   the parser to a new option table is manual (via a call to
#   selectOptionTable). Context reverts to the main option table
#   automatically when -- is encountered.
#
# In QPDFJob.hh, there is a Config class for each option table except
# help.
#
# Option type: bare, required/optional parameter, required/optional
# choices. A bare argument is just a flag, like --qdf. A parameter
# option takes an arbitrary parameter, like --password. A choices
# option takes one of a fixed list of choices, like --object-streams.
# If a parameter or choices option's parameter is option, the empty
# string may be specified as an option, such as --collate (or
# --collate=). For a bare option, --option= is always the same as just
# --option. This makes it possible to switch an option from bare to
# optional choice to optional parameter all without breaking
# compatibility.
#
# JSON "schema". This is a qpdf-specific "schema" for JSON. It is not
# related to any kind of standard JSON schema. It is described in
# JSON.hh and in the manual. QPDFJob uses the JSON "schema" in a mode
# in which keys in the schema are all optional in the JSON object.
#
# Here is the mapping between configuration, argv, and JSON.
#
# The help options table is implemented solely for argv processing and
# has no counterpart in configuration or JSON.
#
# The config() method returns a shared pointer to a Config object.
# Every command-line option in the main option table has a
# corresponding method in Config whose name is the option converted to
# camel case. For bare options and options with optional parameters, a
# version exists that takes no arguments. For other than bare options,
# a version exist, possibly in addition, that takes a std::string
# const&. For example, the --qdf flag implies a qdf() method in
# Config, and the --object-streams flag implies an
# objectStreams(std::string const&) method in Config. For flags in
# option tables, the method is declared inside a config class specific
# to the option table. The mapping between option tables and config
# classes is explicit in job.yml. Positional arguments are handled
# individually and manually -- see QPDFJob.hh in the CONFIGURATION
# section for details. See examples/qpdf-job.cc for an example.
#
# To understand the rest, start at main and follow comments in the
# code.

whoami = os.path.basename(sys.argv[0])
BANNER = f'''//
// This file is automatically generated by {whoami}.
// Edits will be automatically overwritten if the build is
// run in maintainer mode.
//
// clang-format off
//'''

MAN_BANNER = f'''.\\"
.\\" This file is automatically generated by {whoami}.
.\\" Edits will be automatically overwritten if the build is
.\\" run in maintainer mode.
.\\"
'''

def warn(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


@contextmanager
def write_file(filename):
    tmpfile = filename + '.tmp'
    with open(tmpfile, 'w') as f:
        yield f
    if os.path.exists(filename) and filecmp.cmp(filename, tmpfile, False):
        os.unlink(tmpfile)
    else:
        os.rename(tmpfile, filename)


class Main:
    # SOURCES is a list of source files whose contents are used by
    # this program. If they change, we are out of date.
    SOURCES = [
        # Keep this list in sync with CMakeLists.txt: auto_job_inputs
        whoami,
        'CMakeLists.txt',
        'manual/_ext/qpdf.py',
        'job.yml',
        'manual/cli.rst',
        'manual/qpdf.1.in',
    ]
    # DESTS is a map to the output files this code generates. These
    # generated files, as well as those added to DESTS later in the
    # code, are included in various places by QPDFJob.hh or any of the
    # implementing QPDFJob*.cc files.
    DESTS = {
        # Keep this list in sync with CMakeLists.txt: auto_job_outputs
        'decl': 'libqpdf/qpdf/auto_job_decl.hh',
        'init': 'libqpdf/qpdf/auto_job_init.hh',
        'help': 'libqpdf/qpdf/auto_job_help.hh',
        'schema': 'libqpdf/qpdf/auto_job_schema.hh',
        'json_decl': 'libqpdf/qpdf/auto_job_json_decl.hh',
        'json_init': 'libqpdf/qpdf/auto_job_json_init.hh',
        'man': 'manual/qpdf.1',
        # Others are added in top
    }
    # SUMS contains a checksum for each source and destination and is
    # used to detect whether we're up to date without having to force
    # recompilation all the time. This way the build can invoke this
    # script unconditionally without causing stuff to rebuild every
    # time.
    SUMS = 'job.sums'

    def main(self, args=sys.argv[1:], prog=whoami):
        options = self.parse_args(args, prog)
        self.top(options)

    def parse_args(self, args, prog):
        parser = argparse.ArgumentParser(
            prog=prog,
            description='Generate files for QPDFJob',
        )
        mxg = parser.add_mutually_exclusive_group(required=True)
        mxg.add_argument('--check',
                         help='update checksums if files are not up to date',
                         action='store_true', default=False)
        mxg.add_argument('--generate',
                         help='generate files from sources',
                         action='store_true', default=False)
        return parser.parse_args(args)

    def top(self, options):
        with open('job.yml', 'r') as f:
            data = yaml.safe_load(f.read())
        # config_decls maps a config key from an option in "options"
        # (from job.yml) to a list of declarations. A declaration is
        # generated for each config method for that option table.
        self.config_decls = {}
        # Keep track of which configs we've declared since we can have
        # option tables share a config class, as with the encryption
        # tables.
        self.declared_configs = set()

        # Update DESTS -- see above. This ensures that each config
        # class's contents are included in job.sums.
        for o in data['options']:
            config = o.get('config', None)
            if config is not None:
                self.DESTS[config] = f'include/qpdf/auto_job_{config}.hh'
                self.config_decls[config] = []

        if self.check_hashes():
            exit(0)
        elif options.check:
            exit(f'{whoami}: auto job inputs have changed')
        elif options.generate:
            self.generate(data)
        else:
            exit(f'{whoami} unknown mode')

    def get_hashes(self):
        hashes = {}
        for i in sorted([*self.SOURCES, *self.DESTS.values()]):
            m = hashlib.sha256()
            try:
                with open(i, 'rb') as f:
                    m.update(f.read())
                hashes[i] = m.hexdigest()
            except FileNotFoundError:
                pass
        return hashes

    def check_hashes(self):
        hashes = self.get_hashes()
        match = False
        try:
            old_hashes = {}
            with open(self.SUMS, 'r') as f:
                for line in f.readlines():
                    m = re.match(r'^(\S+) (\S+)\s*$', line)
                    if m:
                        old_hashes[m.group(1)] = m.group(2)
            match = old_hashes == hashes
            if not match:
                # Write to stdout, not stderr. What we write to stderr
                # is visible in a normal build. Writing to stdout will
                # hide it in that case but expose it if you directly
                # run ./generate_auto_job --check as in CI.
                print(f'*** {whoami} hash mismatches ***')
                match = False
                for k, v in hashes.items():
                    if k not in old_hashes:
                        print(f'  {k} is not in job.sums')
                    elif v != old_hashes[k]:
                        print(f'  {k} was modified')
                for k in old_hashes:
                    if k not in hashes:
                        print(f'  {k} disappeared')
        except Exception:
            pass
        return match

    def update_hashes(self):
        hashes = self.get_hashes()
        with open(self.SUMS, 'w') as f:
            print(f'# Generated by {whoami}', file=f)
            for k, v in hashes.items():
                print(f'{k} {v}', file=f)

    def generate_doc(self, df, f, f_man):
        st_top = 0
        st_topic = 1
        st_option = 2
        st_option_help = 3
        state = st_top

        indent = None
        topic = None
        option = None
        short_text = None
        long_text = None

        # Generate a bunch of short static functions rather than a big
        # member function for help. Some compilers have problems with
        # very large member functions in classes in anonymous
        # namespaces.

        help_files = 0
        help_lines = 0

        self.all_topics = set(self.options_without_help)
        self.referenced_topics = set()

        def set_indent(x):
            nonlocal indent
            indent = ' ' * len(x)

        def append_long_text(line, topic):
            nonlocal indent, long_text
            if line == '\n':
                long_text += '\n'
            elif line.startswith(indent):
                long_text += line[len(indent):]
            else:
                long_text = long_text.strip()
                if long_text == '':
                    raise Exception(f'missing long text for {topic}')
                long_text += '\n'
                if 'help' not in topic:
                    # Help for --help itself has --help=... not
                    # referring to specific options.
                    for i in re.finditer(r'--help=([^\.\s]+)', long_text):
                        self.referenced_topics.add(i.group(1))
                return True
            return False

        def manify(text):
            lines = text.split('\n')
            out = []
            last_was_item = False
            for line in lines:
                if line.startswith('- '):
                    last_was_item = True
                    out.append('.IP \\[bu]')
                    out.append(line[2:])
                elif last_was_item and line.startswith('  '):
                    out.append(line[2:])
                else:
                    last_was_item = False
                    out.append(line)
            return '\n'.join(out)

        last_option_topic = ''
        lineno = 0
        for line in df.readlines():
            if help_lines == 0:
                if help_files > 0:
                    print('}', file=f)
                help_files += 1
                help_lines += 1
                print(f'static void add_help_{help_files}(QPDFArgParser& ap)\n'
                      '{', file=f)
            lineno += 1
            if state == st_top:
                m = re.match(r'^(\s*\.\. )help-topic (\S+): (.*)$', line)
                if m:
                    set_indent(m.group(1))
                    topic = m.group(2)
                    short_text = m.group(3)
                    long_text = ''
                    state = st_topic
                    continue
                m = re.match(
                    r'^(\s*\.\. )qpdf:option:: (([^=\[\s]+)([\[= ](.+))?)$',
                    line)
                if m:
                    if topic is None:
                        raise Exception('option seen before topic')
                    set_indent(m.group(1))
                    option = m.group(3)
                    synopsis = m.group(2)
                    if synopsis.endswith('`'):
                        raise Exception(
                            f'stray ` at end of option line (line {lineno})')
                    if synopsis != option:
                        long_text = synopsis + '\n'
                    else:
                        long_text = ''
                    state = st_option
                    continue
            elif state == st_topic:
                if append_long_text(line, topic):
                    self.all_topics.add(topic)
                    print(f'ap.addHelpTopic("{topic}", "{short_text}",'
                          f' R"({long_text})");', file=f)
                    print(f'.SH {topic.upper()} ({short_text})', file=f_man)
                    print(manify(long_text), file=f_man, end='')
                    help_lines += 1
                    state = st_top
            elif state == st_option:
                if line == '\n' or line.startswith(indent):
                    m = re.match(r'^(\s*\.\. )help: (.*)$', line)
                    if m:
                        set_indent(m.group(1))
                        short_text = m.group(2)
                        state = st_option_help
                else:
                    raise Exception('option without help text')
            elif state == st_option_help:
                if append_long_text(line, option):
                    if option in self.options_without_help:
                        self.options_without_help.remove(option)
                    else:
                        raise Exception(
                            f'help for unknown option {option},'
                            f' lineno={lineno}')
                    if option not in self.help_options:
                        self.jdata[option[2:]]['help'] = short_text
                    print(f'ap.addOptionHelp("{option}", "{topic}",'
                          f' "{short_text}", R"({long_text})");', file=f)
                    if last_option_topic != topic:
                        print('.PP\nRelated Options:', file=f_man)
                    last_option_topic = topic
                    print(f'.TP\n.B {option} \\-\\- {short_text}', file=f_man)
                    print(manify(long_text), file=f_man, end='')
                    help_lines += 1
                    state = st_top
            if help_lines == 20:
                help_lines = 0
        print('}', file=f)
        print('static void add_help(QPDFArgParser& ap)\n{', file=f)
        for i in range(help_files):
            print(f'    add_help_{i+1}(ap);', file=f)
        print('ap.addHelpFooter("For detailed help, visit'
              ' the qpdf manual: https://qpdf.readthedocs.io\\n");', file=f)
        print('}\n', file=f)
        print('''.SH SEE ALSO
.PP
For a summary of qpdf's options, please run \\fBqpdf \\-\\-help\\fR.
A complete manual can be found at https://qpdf.readthedocs.io.
''', file=f_man, end='')
        for i in self.referenced_topics:
            if i not in self.all_topics:
                raise Exception(f'help text referenced --help={i}')
        for i in self.options_without_help:
            raise Exception(
                'Options without help: ' +
                ', '.join(self.options_without_help))

    def generate(self, data):
        warn(f'{whoami}: regenerating auto job files')
        self.validate(data)

        version = None
        with open('CMakeLists.txt', 'r') as f:
            for line in f.readlines():
                if line.strip().startswith('VERSION '):
                    version = line.strip().split(' ')[1]
        if version is None:
            raise Exception("can't read version from CMakeLists.txt")

        # Keep track of which options are help options since they are
        # handled specially. Add the built-in help options to tables
        # that we populate as we read job.yml since we won't encounter
        # these in job.yml
        self.help_options = set(
            ['--completion-bash', '--completion-zsh', '--help']
        )
        # Keep track of which options we have encountered but haven't
        # seen help text for. This enables us to report if any option
        # is missing help.
        self.options_without_help = set(self.help_options)

        # Compute the information needed for generated files and write
        # the files.
        self.prepare(data)
        with write_file(self.DESTS['decl']) as f:
            print(BANNER, file=f)
            for i in self.decls:
                print(i, file=f)
        with write_file(self.DESTS['init']) as f:
            print(BANNER, file=f)
            for i in self.init:
                print(i, file=f)
        with write_file(self.DESTS['help']) as f:
            with write_file(self.DESTS['man']) as f_man:
                print(MAN_BANNER, file=f_man, end='')
                with open('manual/qpdf.1.in', 'r') as m_in:
                    for line in m_in.readlines():
                        line = line.replace('@PROJECT_VERSION@', version)
                        print(line, file=f_man, end='')
                with open('manual/cli.rst', 'r') as df:
                    print(BANNER, file=f)
                    self.generate_doc(df, f, f_man)

        # Compute the json files after the config and arg parsing
        # files. We need to have full information about all the
        # options before we can generate the schema. Generating the
        # schema also generates the json header files.
        self.generate_schema(data)
        with write_file(self.DESTS['schema']) as f:
            print('static constexpr char const* JOB_SCHEMA_DATA = R"(' +
                  json.dumps(self.schema, indent=2, separators=(',', ': ')) +
                  ')";', file=f)
        for k, v in self.config_decls.items():
            with write_file(self.DESTS[k]) as f:
                print(BANNER, file=f)
                for i in v:
                    print(i, file=f)
        with write_file(self.DESTS['json_decl']) as f:
            print(BANNER, file=f)
            for i in self.json_decls:
                print(i, file=f)
        with write_file(self.DESTS['json_init']) as f:
            print(BANNER, file=f)
            for i in self.json_init:
                print(i, file=f)

        # Update hashes last to ensure that this will be rerun in the
        # event of a failure.
        self.update_hashes()
        # DON'T ADD CODE TO generate AFTER update_hashes

    def handle_trivial(self, i, identifier, cfg, prefix, kind, v):
        # A "trivial" option is one whose handler does nothing other
        # than to call the config method with the same name (switched
        # to camelCase).
        decl_arg = 1
        decl_arg_optional = False
        if kind == 'bare':
            decl_arg = 0
            self.init.append(f'this->ap.addBare("{i}", '
                             f'[this](){{{cfg}->{identifier}();}});')
        elif kind == 'required_parameter':
            self.init.append(
                f'this->ap.addRequiredParameter("{i}", '
                f'[this](std::string const& x){{{cfg}->{identifier}(x);}}'
                f', "{v}");')
        elif kind == 'optional_parameter':
            decl_arg_optional = True
            self.init.append(
                f'this->ap.addOptionalParameter("{i}", '
                f'[this](std::string const& x){{{cfg}->{identifier}(x);}});')
        elif kind == 'required_choices':
            self.init.append(
                f'this->ap.addChoices("{i}", '
                f'[this](std::string const& x){{{cfg}->{identifier}(x);}}'
                f', true, {v}_choices);')
        elif kind == 'optional_choices':
            decl_arg_optional = True
            self.init.append(
                f'this->ap.addChoices("{i}", '
                f'[this](std::string const& x){{{cfg}->{identifier}(x);}}'
                f', false, {v}_choices);')

        # Generate declarations for config methods separately by
        # config object.
        config_prefix = prefix + 'Config'
        arg = ''
        if decl_arg:
            arg = 'std::string const& parameter'
        fn = f'{config_prefix}* {identifier}({arg})'
        if fn not in self.declared_configs:
            self.declared_configs.add(fn)
            self.config_decls[cfg].append(f'QPDF_DLL {fn};')
            if decl_arg_optional:
                # Rather than making the parameter optional, add an
                # overloaded method that takes no arguments. This
                # strategy enables us to change an option from bare to
                # optional_parameter or optional_choices without
                # breaking binary compatibility. The overloaded
                # methods both have to be implemented manually. They
                # are not automatically called, so if you forget,
                # someone will get a link error if they try to call
                # one.
                self.config_decls[cfg].append(
                    f'QPDF_DLL {config_prefix}* {identifier}();')

    def handle_flag(self, i, identifier, kind, v):
        # For flags that require manual handlers, declare the handler
        # and register it. They have to be implemented manually in
        # QPDFJob_argv.cc. You get compiler/linker errors for any
        # missing methods.
        if kind == 'bare':
            self.decls.append(f'void {identifier}();')
            self.init.append(f'this->ap.addBare("{i}", '
                             f'b(&ArgParser::{identifier}));')
        elif kind == 'required_parameter':
            self.decls.append(f'void {identifier}(std::string const&);')
            self.init.append(f'this->ap.addRequiredParameter("{i}", '
                             f'p(&ArgParser::{identifier})'
                             f', "{v}");')
        elif kind == 'optional_parameter':
            self.decls.append(f'void {identifier}(std::string const&);')
            self.init.append(f'this->ap.addOptionalParameter("{i}", '
                             f'p(&ArgParser::{identifier}));')
        elif kind == 'required_choices':
            self.decls.append(f'void {identifier}(std::string const&);')
            self.init.append(f'this->ap.addChoices("{i}", '
                             f'p(&ArgParser::{identifier})'
                             f', true, {v}_choices);')
        elif kind == 'optional_choices':
            self.decls.append(f'void {identifier}(std::string const&);')
            self.init.append(f'this->ap.addChoices("{i}", '
                             f'p(&ArgParser::{identifier})'
                             f', false, {v}_choices);')

    def prepare(self, data):
        self.decls = []         # argv handler declarations
        self.init = []          # initialize arg parsing code
        self.json_decls = []    # json handler declarations
        self.json_init = []     # initialize json handlers
        self.jdata = {}         # running data used for json generate
        self.by_table = {}      # table information by name for easy lookup

        def add_jdata(flag, table, details):
            # Keep track of each flag and where it appears so we can
            # check consistency between the json information and the
            # options section.
            nonlocal self
            if table == 'help':
                self.help_options.add(f'--{flag}')
            elif flag in self.jdata:
                self.jdata[flag]['tables'][table] = details
            else:
                self.jdata[flag] = {
                    'tables': {table: details},
                }

        # helper functions
        self.init.append('auto b = [this](void (ArgParser::*f)()) {')
        self.init.append('    return QPDFArgParser::bindBare(f, this);')
        self.init.append('};')
        self.init.append(
            'auto p = [this](void (ArgParser::*f)(std::string const&)) {')
        self.init.append('    return QPDFArgParser::bindParam(f, this);')
        self.init.append('};')
        self.init.append('')

        # static variables for each set of choices for choices options
        for k, v in data['choices'].items():
            s = f'static char const* {k}_choices[] = {{'
            for i in v:
                s += f'"{i}", '
            s += '0};'
            self.init.append(s)
            self.json_init.append(s)
        self.init.append('')
        self.json_init.append('')

        # constants for the table names to reduce hard-coding strings
        # in the handlers
        for o in data['options']:
            table = o['table']
            if table in ('main', 'help'):
                continue
            i = self.to_identifier(table, 'O', True)
            self.decls.append(f'static constexpr char const* {i} = "{table}";')
        self.decls.append('')

        # Walk through all the options adding declarations for the
        # option handlers and initialization code to register the
        # handlers in QPDFArgParser. For "trivial" cases,
        # QPDFArgParser will call the corresponding config method
        # automatically. Otherwise, it will declare a handler that you
        # have to explicitly implement.

        # If you add a new option table, you have to set config to the
        # name of a member variable that you declare in the ArgParser
        # class in QPDFJob_argv.cc. Then there should be an option in
        # the main table, also listed as manual in job.yml, that
        # switches to it. See implementations of any of the existing
        # options that do this for examples.
        for o in data['options']:
            table = o['table']
            config = o.get('config', None)
            table_prefix = o.get('prefix', '')
            arg_prefix = 'arg' + table_prefix
            config_prefix = o.get('config_prefix', table_prefix)
            manual = o.get('manual', [])
            json_prefix = table_prefix or table
            self.by_table[json_prefix] = {
                'config': config,
                'manual': manual,
            }
            if table == 'main':
                self.init.append('this->ap.selectMainOptionTable();')
            elif table == 'help':
                self.init.append('this->ap.selectHelpOptionTable();')
            else:
                identifier = self.to_identifier(table, 'argEnd', False)
                self.init.append(f'this->ap.registerOptionTable("{table}",'
                                 f' b(&ArgParser::{identifier}));')
            if o.get('positional', False):
                self.decls.append(
                    f'void {arg_prefix}Positional(std::string const&);')
                self.init.append('this->ap.addPositional('
                                 f'p(&ArgParser::{arg_prefix}Positional));')

            flags = {}
            for i in o.get('bare', []):
                flags[i] = ['bare', None]
            for i, v in o.get('required_parameter', {}).items():
                flags[i] = ['required_parameter', v]
            for i in o.get('optional_parameter', []):
                flags[i] = ['optional_parameter', None]
            for i, v in o.get('required_choices', {}).items():
                flags[i] = ['required_choices', v]
            for i, v in o.get('optional_choices', {}).items():
                flags[i] = ['optional_choices', v]
                self.options_without_help.add(f'--{i}')

            for i, [kind, v] in flags.items():
                self.options_without_help.add(f'--{i}')
                add_jdata(i, json_prefix, [kind, v])
                if config is None or i in manual:
                    identifier = self.to_identifier(i, arg_prefix, False)
                    self.handle_flag(i, identifier, kind, v)
                else:
                    identifier = self.to_identifier(i, '', False)
                    self.handle_trivial(
                        i, identifier, config, config_prefix, kind, v)

            # Subsidiary options tables need end methods to do any
            # final checking within the option table. Final checking
            # for the main option table is handled by
            # checkConfiguration, which is called explicitly in the
            # QPDFJob code.
            if table not in ('main', 'help'):
                identifier = self.to_identifier(table, 'argEnd', False)
                self.decls.append(f'void {identifier}();')

    def handle_json_trivial(self, flag_key, fdata):
        config = None
        for t, [kind, v] in fdata['tables'].items():
            # We have determined that all tables, if multiple, have
            # the same config.
            tdata = self.by_table[t]
            config = tdata['config']
        if kind == 'bare':
            self.json_init.append(
                f'addBare([this]() {{ {config}->{flag_key}(); }});')
        elif kind == 'required_parameter' or kind == 'optional_parameter':
            # Optional parameters end up just being the empty string,
            # so the handler has to deal with it. The empty string is
            # also allowed for non-optional.
            self.json_init.append(
                f'addParameter([this](std::string const& p)'
                f' {{ {config}->{flag_key}(p); }});')
        elif kind == 'required_choices':
            self.json_init.append(
                f'addChoices({v}_choices, true,'
                f' [this](std::string const& p)'
                f' {{ {config}->{flag_key}(p); }});')
        elif kind == 'optional_choices':
            self.json_init.append(
                f'addChoices({v}_choices, false,'
                f' [this](std::string const& p)'
                f' {{ {config}->{flag_key}(p); }});')

    def handle_json_manual(self, path):
        method = re.sub(r'\.([a-zA-Z0-9])',
                        lambda x: x.group(1).upper(),
                        f'setup{path}')
        self.json_decls.append(f'void {method}();')
        self.json_init.append(f'{method}();')

    def option_to_json_key(self, s):
        return self.to_identifier(s, '', False)

    def flag_to_schema_key(self, k):
        if k.startswith('_'):
            schema_key = k[1:]
        else:
            schema_key = re.sub(r'[^\.]+\.', '', k)
        return self.option_to_json_key(schema_key)

    def build_schema(self, j, path, flag, expected, options_seen):
        # j: the part of data from "json" in job.yml as we traverse it
        # path: a string representation of the path in the json
        # flag: the command-line flag
        # expected: a map of command-line options we expect to eventually see
        # options_seen: which options we have seen so far

        # As described in job.yml, the json can have keys that don't
        # map to options. This includes keys whose values are
        # dictionaries as well as keys that correspond to positional
        # arguments. These start with _ and get their help from
        # job.yml. Things that correspond to options get their help
        # from the help text we gathered from cli.rst.

        if flag in expected:
            options_seen.add(flag)
        elif flag.startswith('__'):
            # This marks a flag that has no JSON equivalent because it
            # is handled in some other fashion.
            options_seen.add(flag[2:])
            return
        elif isinstance(j, str):
            if not flag.startswith('_'):
                raise Exception(f'json: {flag} has a description'
                                ' but doesn\'t start with _')
        elif not (flag == '' or flag.startswith('_')):
            raise Exception(f'json: unknown key {flag}')

        # The logic here is subtle and makes sense if you understand
        # how our JSON schemas work. They are described in JSON.hh,
        # but basically, if you see a dictionary, the schema should
        # have a dictionary with the same keys whose values are
        # descriptive. If you see an array, the array should have
        # single member that describes each element of the array. See
        # JSON.hh for details.

        # See comments in QPDFJob_json.cc in the Handlers class
        # declaration to understand how and why the methods called
        # here work. The idea is that Handlers keeps a stack of
        # JSONHandler shared pointers so that we can register our
        # handlers in the right place as we go.
        if isinstance(j, dict):
            schema_value = {}
            if flag:
                identifier = self.to_identifier(path, '', False)
                self.json_decls.append(f'void begin{identifier}(JSON);')
                self.json_decls.append(f'void end{identifier}();')
                self.json_init.append(
                    f'beginDict(bindJSON(&Handlers::begin{identifier}),'
                    f' bindBare(&Handlers::end{identifier})); // {path}')
            for k, v in j.items():
                schema_key = self.flag_to_schema_key(k)
                subpath = f'{path}.{schema_key}'
                self.json_init.append(f'pushKey("{schema_key}");')
                schema_value[schema_key] = self.build_schema(
                    v, subpath, k, expected, options_seen)
                self.json_init.append(f'popHandler(); // key: {schema_key}')
        elif isinstance(j, list):
            if len(j) != 1:
                raise Exception('json contains array with length != 1')
            identifier = self.to_identifier(path, '', False)
            self.json_decls.append(f'void begin{identifier}Array(JSON);')
            self.json_decls.append(f'void end{identifier}Array();')
            self.json_init.append(
                f'beginArray(bindJSON(&Handlers::begin{identifier}Array),'
                f' bindBare(&Handlers::end{identifier}Array));'
                f' // {path}[]')
            schema_value = [
                self.build_schema(j[0], path, flag,
                                  expected, options_seen)
            ]
            self.json_init.append(
                f'popHandler(); // array: {path}[]')
        else:
            schema_value = j
            if schema_value is None:
                schema_value = re.sub(
                    r'--([^\s=]+)',
                    lambda x: self.option_to_json_key(x.group(1)),
                    expected[flag]['help'])
            is_trivial = False
            if flag in expected:
                is_trivial = True
                common_config = None
                for t in expected[flag]['tables']:
                    tdata = self.by_table[t]
                    if flag in tdata['manual']:
                        is_trivial = False
                    if common_config is None:
                        common_config = tdata['config']
                    elif common_config != tdata['config']:
                        is_trivial = False
            config_key = self.flag_to_schema_key(flag)
            if is_trivial:
                self.handle_json_trivial(config_key, expected[flag])
            else:
                self.handle_json_manual(path)
        return schema_value

    def generate_schema(self, data):
        # Check to make sure that every command-line option is
        # represented in data['json']. Build a list of options that we
        # expect. If an option appears once, we just expect to see it
        # once. If it appears in more than one options table, we need
        # to see a separate version of it for each option table. It is
        # represented in job.yml prepended with the table prefix. The
        # table prefix is removed in the schema. Example: "password"
        # appears multiple times, so the json section of job.yml has
        # main.password, uo.password, etc. But most options appear
        # only once, so we can just list them as they are. There is a
        # nearly exact match between option tables and dictionary in
        # the job json schema, but it's not perfect because of how
        # positional arguments are handled, so we have to do this
        # extra work. Information about which tables a particular
        # option appeared in is gathered up in prepare().
        expected = {}
        for k, v in self.jdata.items():
            tables = v['tables']
            if len(tables) == 1:
                expected[k] = {**v}
            else:
                for t in sorted(tables):
                    expected[f'{t}.{k}'] = {**v}
        options_seen = set()

        # Walk through the json information building the schema as we
        # go. This verifies consistency between command-line options
        # and the json section of the data and builds up a schema by
        # populating with help information as available. In addition
        # to generating the schema, we declare and register json
        # handlers that correspond with it. That way, we can first
        # check a job JSON file against the schema, and if it matches,
        # we have fewer error opportunities while calling handlers.
        self.schema = self.build_schema(
            data['json'], '', '', expected, options_seen)
        if options_seen != set(expected.keys()):
            raise Exception('missing from json: ' +
                            str(set(expected.keys()) - options_seen))

    def check_keys(self, what, d, exp):
        if not isinstance(d, dict):
            exit(f'{what} is not a dictionary')
        actual = set(d.keys())
        extra = actual - exp
        if extra:
            exit(f'{what}: unknown keys = {extra}')

    def validate(self, data):
        self.check_keys('top', data, set(
            ['choices', 'options', 'json']))
        for o in data['options']:
            self.check_keys('top', o, set(
                ['table', 'prefix', 'config', 'config_prefix',
                 'manual', 'bare', 'positional',
                 'optional_parameter', 'required_parameter',
                 'required_choices', 'optional_choices']))

    def to_identifier(self, label, prefix, const):
        identifier = re.sub(r'[^a-zA-Z0-9]', '_', label)
        if const:
            identifier = f'{prefix}_{identifier.upper()}'
        else:
            if prefix:
                identifier = f'{prefix}_{identifier}'
            identifier = re.sub(r'_([a-z])',
                                lambda x: x.group(1).upper(),
                                identifier).replace('_', '')
        return identifier


if __name__ == '__main__':
    try:
        os.chdir(os.path.dirname(os.path.realpath(__file__)))
        Main().main()
    except KeyboardInterrupt:
        exit(130)
